# Import Required Libraries

import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import seaborn as sns
import itertools
from collections import Counter, defaultdict
import scipy.stats as stats
import math


# Table 2

ses = pd.read_excel('SES_Scores.xlsx')

# Define main path for raw election file
main_path = '/Users/Election Results'
sub_path_list = os.listdir(main_path)
# del sub_path_list[1]  # Remove not needed element if it is existed

for sub_path in sub_path_list:
    ses[sub_path + '_turnout'] = ''
    ses[sub_path + '_winner'] = ''
    ses[sub_path + '_winner_ratio'] = ''
    excel_list = os.listdir(main_path + '/' + sub_path)
    for excel_name in excel_list:
        data = pd.read_excel(main_path + '/' + sub_path + '/' + excel_name)
        data.columns = data.loc[9]
        data = data.loc[10:].reset_index(drop=True)
        data = data.groupby(['İl Adı','İlçe Adı','Mahalle/Köy']).sum().reset_index()
        for i in range(len(data)):
            muhtarlik = data.loc[i]['Mahalle/Köy']
            if muhtarlik[-6] == ' ':
                muhtarlik = muhtarlik[0:-6]
            else:
                muhtarlik = muhtarlik[0:-5]
            if muhtarlik in list(ses[ses['İLÇE ADI'] == data.loc[i]['İlçe Adı']]['MAHALLE ADI']):
                index_no = ses[ses['İLÇE ADI'] == data.loc[i]['İlçe Adı']]['MAHALLE ADI'][
                    ses[ses['İLÇE ADI'] == data.loc[i]['İlçe Adı']]['MAHALLE ADI'] == muhtarlik].index[0]
                ses.at[index_no, sub_path + '_turnout'] = data.loc[i]['Oy Kullanan Seçmen Sayısı'] / data.loc[i]['Kayıtlı Seçmen Sayısı']
                excluded_cols = ['Oy Kullanan Seçmen Sayısı', 'Kayıtlı Seçmen Sayısı', 'Toplam Geçerli Oy', 'Geçersiz Oylar','Toplam Geçersiz Oy',
                                 'İl Adı', 'İlçe Adı', 'Mahalle/Köy','Sıra No','Sandık No','İtirazsız Geçerli Oy Sayısı','İtirazlı Geçerli Oy Sayısı']
                party_votes = data.loc[i].drop(labels=excluded_cols, errors='ignore')
                if not party_votes.empty and data.loc[i]['Toplam Geçerli Oy'] > 0:
                    winner_party = party_votes.astype(float).idxmax()
                    winner_votes = party_votes.astype(float).max()
                    vote_ratio = winner_votes / data.loc[i]['Toplam Geçerli Oy']
                    ses.at[index_no, sub_path + '_winner'] = winner_party
                    ses.at[index_no, sub_path + '_winner_ratio'] = vote_ratio
                else:
                    ses.at[index_no, sub_path + '_winner'] = ''
                    ses.at[index_no, sub_path + '_winner_ratio'] = 0

order = ['A+', 'A', 'B+', 'B', 'C+', 'C', 'D', 'E']

table2={}
for rank in order:
    my_list={}
    my_list['istanbul_bb_2019_winner'] = Counter(ses[ses['SES']==rank]['istanbul_bb_2019_winner'])
    my_list['istanbul_bb_2024_winner'] = Counter(ses[ses['SES']==rank]['istanbul_bb_2024_winner'])
    my_list['istanbul_cb_winner'] = Counter(ses[ses['SES']==rank]['istanbul_cb_winner'])
    table2[rank] = my_list

print(table2)

istanbul_2024_blank_winners = ses[ses['istanbul_bb_2024_winner'] == '']
cb_blank_winners = ses[ses['istanbul_cb_winner'] == '']

print(istanbul_2024_blank_winners[['İLÇE ADI', 'MAHALLE ADI', 'istanbul_bb_2024_turnout']])
print(cb_blank_winners[['İLÇE ADI', 'MAHALLE ADI', 'istanbul_cb_turnout']])


# Figures

ses = pd.read_excel('SES_Scores.xlsx')

# Define main path for raw election file
main_path = '/Users/Election Results'
sub_path_list = os.listdir(main_path)
# del sub_path_list[1]  # Remove not needed element if it is existed

for sub_path in sub_path_list:
    ses[sub_path + '_turnout'] = ''
    ses[sub_path + '_chp'] = ''
    excel_list = os.listdir(main_path + '/' + sub_path)
    for excel_name in excel_list:
        data = pd.read_excel(main_path + '/' + sub_path + '/' + excel_name)
        data.columns = data.loc[9]
        data = data.loc[10:].reset_index(drop=True)
        data = data.groupby(['İl Adı','İlçe Adı','Mahalle/Köy']).sum().reset_index()
        for i in range(len(data)):
            muhtarlik = data.loc[i]['Mahalle/Köy']
            if muhtarlik[-6] == ' ':
                muhtarlik = muhtarlik[0:-6]
            else:
                muhtarlik = muhtarlik[0:-5]

            if muhtarlik in list(ses[ses['İLÇE ADI'] == data.loc[i]['İlçe Adı']]['MAHALLE ADI']):
                index_no = ses[ses['İLÇE ADI'] == data.loc[i]['İlçe Adı']]['MAHALLE ADI'][
                    ses[ses['İLÇE ADI'] == data.loc[i]['İlçe Adı']]['MAHALLE ADI'] == muhtarlik
                ].index[0]
                ses.at[index_no, sub_path + '_turnout'] = data.loc[i]['Oy Kullanan Seçmen Sayısı'] / data.loc[i]['Kayıtlı Seçmen Sayısı']
                if 'CHP' in data.loc[i]:
                    ses.at[index_no, sub_path + '_chp'] = data.loc[i]['CHP'] / data.loc[i]['Toplam Geçerli Oy']
                else:
                    ses.at[index_no, sub_path + '_chp'] = 0 


my_list = ['istanbul_bb_2019_chp', 'istanbul_ilce_2019_chp', 'istanbul_meclis_2019_chp','istanbul_cb_chp', 'istanbul_bb_2024_chp', 'istanbul_ilce_2024_chp','istanbul_meclis_2024_chp']

pairs = list(itertools.combinations(my_list, 2))

for pair in pairs:
    col1, col2 = pair
    ses[col1] = pd.to_numeric(ses[col1], errors='coerce')
    ses[col2] = pd.to_numeric(ses[col2], errors='coerce')
    ses[col2 + '_' + col1 + '_dif'] = np.where((ses[col1] == 0) | (ses[col2] == 0),0,ses[col2] - ses[col1])
    ses[col2 + '_' + col1 + '_ratio'] = np.where((ses[col1] == 0) | (ses[col2] == 0),0,ses[col2] / ses[col1])

# For the specific pair: 'istanbul_meclis_2024_chp' and 'istanbul_bb_2024_chp'
col1 = 'istanbul_meclis_2024_chp'
col2 = 'istanbul_bb_2024_chp'

ses[col1] = pd.to_numeric(ses[col1], errors='coerce')
ses[col2] = pd.to_numeric(ses[col2], errors='coerce')

ses[col2 + '_' + col1 + '_dif'] = np.where((ses[col1] == 0) | (ses[col2] == 0),0,ses[col2] - ses[col1])
ses[col2 + '_' + col1 + '_ratio'] = np.where((ses[col1] == 0) | (ses[col2] == 0),0,ses[col2] / ses[col1])

ses.replace([np.inf, -np.inf], np.nan, inplace=True)

plt.style.use('default')
plt.rcParams['figure.facecolor'] = 'white'

def ensure_numeric(df, col):
    df[col] = pd.to_numeric(df[col], errors='coerce')

def aggregate_by_SES(df, col, order):
    """
    Remove observations where the target column is missing OR equals 0.
    Then group by SES and compute the mean.
    """
    df_clean = df.dropna(subset=[col]).copy()
    df_clean = df_clean[df_clean[col] != 0]
    ensure_numeric(df_clean, col)
    agg = df_clean.groupby('SES')[col].mean()
    agg = agg.reindex(order)
    return agg.dropna().reset_index()

agg_prop = aggregate_by_SES(ses, 'istanbul_bb_2024_chp_istanbul_bb_2019_chp_dif', order)
agg_prop.rename(columns={'istanbul_bb_2024_chp_istanbul_bb_2019_chp_dif': 'prop_diff'}, inplace=True)
agg_prop['prop_diff'] *= 100
agg_abs = aggregate_by_SES(ses, 'istanbul_bb_2024_chp_istanbul_bb_2019_chp_ratio', order)
agg_abs.rename(columns={'istanbul_bb_2024_chp_istanbul_bb_2019_chp_ratio': 'relative_diff'}, inplace=True)
agg_abs['relative_diff'] = (agg_abs['relative_diff'] - 1) * 100
merged = pd.merge(agg_prop, agg_abs, on='SES')
x = np.arange(len(merged)) 
width = 0.35 
fig, ax1 = plt.subplots(figsize=(10,6))
ax2 = ax1.twinx()
bars1 = ax1.bar(x - width/2, merged['prop_diff'], width, label='Absolute Difference (%)', color='lightblue')
bars2 = ax2.bar(x + width/2, merged['relative_diff'], width, label='Relative Difference (%)', color='salmon')
ax1.set_xlabel('SES Scores')
ax1.set_ylabel('Absolute Vote Difference (%)', color='black')
ax2.set_ylabel('Relative Vote Difference (%)', color='black')
ax1.tick_params(axis='y', labelcolor='black')
ax2.tick_params(axis='y', labelcolor='black')
ax1.set_ylim(0, 8)
ax2.set_ylim(0, 30)
plt.xticks(x, merged['SES'])
plt.title('2024 IBB - 2019 IBB Elections Vote Differences of CHP by SES Scores')
fig.legend(loc="upper left", bbox_to_anchor=(0.1,0.9))
plt.tight_layout()
plt.savefig('Results/2024-2019_IBB_Merged_SideBySide.png', dpi=300)
plt.show()

agg_meclis_diff_prop = aggregate_by_SES(ses, 'istanbul_bb_2024_chp_istanbul_meclis_2019_chp_dif', order)
agg_meclis_diff_prop.rename(columns={'istanbul_bb_2024_chp_istanbul_meclis_2019_chp_dif': 'prop_diff'}, inplace=True)
agg_meclis_diff_prop['prop_diff'] *= 100
agg_meclis_diff_abs = aggregate_by_SES(ses, 'istanbul_bb_2024_chp_istanbul_meclis_2019_chp_ratio', order)
agg_meclis_diff_abs.rename(columns={'istanbul_bb_2024_chp_istanbul_meclis_2019_chp_ratio': 'relative_diff'}, inplace=True)
agg_meclis_diff_abs['relative_diff'] = (agg_meclis_diff_abs['relative_diff'] - 1) * 100
merged_meclis = pd.merge(agg_meclis_diff_prop, agg_meclis_diff_abs, on='SES')
x = np.arange(len(merged_meclis))
width = 0.35
fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()
bars1 = ax1.bar(x - width/2, merged_meclis['prop_diff'], width, label='Absolute Difference (%)', color='lightblue')
bars2 = ax2.bar(x + width/2, merged_meclis['relative_diff'], width, label='Relative Difference (%)', color='salmon')
ax1.set_ylabel('Absolute Vote Difference (%)', color='black')
ax2.set_ylabel('Relative Vote Difference (%)', color='black')
ax1.tick_params(axis='y', labelcolor='black')
ax2.tick_params(axis='y', labelcolor='black')
ax1.set_xlabel('SES Scores')
plt.xticks(x, merged_meclis['SES'])
ax1.set_ylim(0, 25)
ax2.set_ylim(0, 140)
plt.title('2024 IBB - 2019 IBB Council Elections Vote Differences of CHP by SES Scores')
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
plt.tight_layout()
plt.savefig('Results/2024_IBB_vs_2019_IBBC_Merged_SideBySide.png', dpi=300)
plt.show()

agg_meclis_diff_prop = aggregate_by_SES(ses, 'istanbul_bb_2024_chp_istanbul_cb_chp_dif', order)
agg_meclis_diff_prop.rename(columns={'istanbul_bb_2024_chp_istanbul_cb_chp_dif': 'prop_diff'}, inplace=True)
agg_meclis_diff_prop['prop_diff'] *= 100
agg_meclis_diff_abs = aggregate_by_SES(ses, 'istanbul_bb_2024_chp_istanbul_cb_chp_ratio', order)
agg_meclis_diff_abs.rename(columns={'istanbul_bb_2024_chp_istanbul_cb_chp_ratio': 'relative_diff'}, inplace=True)
agg_meclis_diff_abs['relative_diff'] = (agg_meclis_diff_abs['relative_diff'] - 1) * 100
merged_meclis = pd.merge(agg_meclis_diff_prop, agg_meclis_diff_abs, on='SES')
x = np.arange(len(merged_meclis))
width = 0.35
fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()
bars1 = ax1.bar(x - width/2, merged_meclis['prop_diff'], width, label='Absolute Difference (%)', color='lightblue')
bars2 = ax2.bar(x + width/2, merged_meclis['relative_diff'], width, label='Relative Difference (%)', color='salmon')
ax1.set_ylabel('Absolute Vote Difference (%)', color='black')
ax2.set_ylabel('Relative Vote Difference (%)', color='black')
ax1.tick_params(axis='y', labelcolor='black')
ax2.tick_params(axis='y', labelcolor='black')
ax1.set_xlabel('SES Scores')
plt.xticks(x, merged_meclis['SES'])
ax1.set_ylim(0, 30)
ax2.set_ylim(0, 140)
plt.title('2024 IBB - 2023 General Elections Vote Differences of CHP by SES Scores')
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
plt.tight_layout()
plt.savefig('Results/2024_IBB_vs_2023_GE_Merged_SideBySide.png', dpi=300)
plt.show()


# Statistics

data_E = ses[ses['SES'] == 'E']
data_E['diff'] = data_E['istanbul_bb_2024_chp'] - data_E['istanbul_bb_2019_chp']
diff_data = data_E['diff'].dropna()
shapiro_stat, shapiro_p = stats.shapiro(diff_data)
print(f"Shapiro-Wilk Test Statistic: {shapiro_stat:.4f}, p-value: {shapiro_p:.4f}")

w_stat, w_pvalue = stats.wilcoxon(data_E['istanbul_bb_2024_chp'], data_E['istanbul_bb_2019_chp'])
print("Wilcoxon signed-rank test statistic:", w_stat)
print("Wilcoxon signed-rank test p-value:", w_pvalue)


# Procurements

ekap = pd.read_excel('IBB_Procurements_2014-2024.xlsx')

replacements = {
    'HAMİDİYE KAYNAK SULA RI SAN.TURİZM VE TİC .A.Ş.': 'HAMİDİYE KAYNAK SULARI SAN.TURİZM VE TİC.A.Ş.',
    'İSTANBUL ASFALT FABR İKALARI SANAYİ VE TİCARET A.Ş.': 'İSTANBUL ASFALT FABRİKALARI SANAYİ VE TİCARET ANONİM ŞİRKETİ',
    'İSPARK İSTANBUL OTOP ARK İŞLETMELERİ TİCARET A.Ş.': 'İSPARK İSTANBUL OTOPARK İŞLETMELERİ TİCARET A.Ş.',
    'İSTON İSTANBUL BETON ELEMANL.VE HAZIR BET ON FAB.SAN.VE TİC.AŞ': 'İSTON İSTANBUL BETON ELEMANLARI VE HAZIR BETON FABRİKALARI SANAYİ VE TİCARET ANONİM ŞİRKETİ',
    'İSTANBUL KÜLTÜR VE S ANAT ÜRÜNLERİ Tİ C.A.Ş.': 'İSTANBUL KÜLTÜR VE SANAT ÜRÜNLERİ TİC.A.Ş.',
    'İSTANBUL AĞAÇ VE PEY EĞİ.HİZ.HAY.BAH. İŞL.SAN.TİC.A.Ş': 'İSTANBUL AĞAÇ PEYZAJ EĞİTİM HİZMETLERİ VE HAYVANAT BAHÇESİ İŞLETMECİLİĞİ SANAYİ VE TİCARET A.Ş.',
    'BELBİM ELEKTRONİK PARA VE ÖDEME HİZMETLERİ ANONİM ŞİRKETİ': 'BELBİM ELEKTRONİK PARA VE ÖDEME HİZMETLERİ ANONİM ŞİRKETİ.',
    'İSPER İSTANBUL PERSONEL YÖNETİM A.Ş.': 'İSPER İSTANBUL PERSONEL YÖNETİM ANONİM ŞİRKETİ',
    'İSTAÇ İSTANBUL ÇEVRE YÖNETIMI SAN. VE TIC. A.Ş.': 'İSTAÇ İSTANBUL ÇEVRE YÖNETİMİ SANAYİ VE TİCARET ANONİM ŞİRKETİ',
    'İSTANBUL HALK EKMEK GIDA VE SANAYİ A.Ş.': 'İSTANBUL HALK EKMEK UN VE UNLU MADDELER GIDA SANAYİ VE TİCARET ANONİM ŞİRKETİ',
    'METRO İSTANBUL SANAYİ VE TİCARET A.Ş': 'METRO İSTANBUL SANAYİ VE TİCARET A.Ş.',
    'İSTANBUL SPOR ETKİNL İKLERİ VE İŞLET. TİC.A.Ş.': 'İSTANBUL SPOR ETKİNLİKLERİ VE İŞLETMECİLİĞİ TİCARET ANONİM ŞİRKETİ',
    'İSTANBUL UYGULAMALI GAZ.VE EN.TEK.AR MÜH.SAN.VE Tİ.AŞ': 'İSTANBUL UYGULAMALI GAZ VE ENERJİ TEKNOLOJİLERİ ARAŞTIRMA MÜHENDİSLİK SANAYİ VE TİCARET ANONİM ŞİRKETİ'}

ekap['IbbIstirak'] = ekap['IbbIstirak'].replace(replacements)

ekap = ekap.dropna(subset=['ihale_dolar'])
ekap.reset_index(drop=True, inplace=True)
ekap['SozlesmeTarihi'] = pd.to_datetime(ekap['SozlesmeTarihi'], format='%d.%m.%Y', errors='coerce')

# Remove rows where FirmaAdi appears in IbbIstirak, then keep groups having tenders both before and after 2019-04-01
ekap = ekap[~ekap['FirmaAdi'].isin(ekap['IbbIstirak'].dropna())].reset_index(drop=True)
ekap = ekap.groupby('IbbIstirak').filter(lambda group: (group['SozlesmeTarihi'] <= '2019-03-31').any() and (group['SozlesmeTarihi'] >= '2019-04-01').any())
ekap.reset_index(drop=True, inplace=True)

df_before = ekap[ekap['SozlesmeTarihi'] <= pd.Timestamp('2019-03-31')]
df_after  = ekap[ekap['SozlesmeTarihi'] >= pd.Timestamp('2019-04-01')]


def split_companies(firma_str):
    """Split a string of company names separated by commas."""
    return [item.strip() for item in str(firma_str).split(",")]

def get_companies_from_series(series):
    """Return a flat set of individual company names from a pandas Series."""
    return set([company.strip() for s in series for company in str(s).split(",")])

def aggregate_job_counts(df, filter_field=None, filter_value=None, company_field='FirmaAdi'):
    """
    Count tenders per company.

    - For "Açık" usul (when filter_field=='IhaleUsulu' and filter_value=="Açık"),
      count the tender only if split FirmaAdi has more than 1 company.
    - Otherwise, count if at least one company appears.
    """
    counts = defaultdict(int)
    if filter_field is not None and filter_value is not None:
        df_filtered = df[df[filter_field] == filter_value]
    else:
        df_filtered = df
    for _, row in df_filtered.iterrows():
        companies = split_companies(row[company_field])
        if filter_field == 'IhaleUsulu' and filter_value == "Açık":
            if len(companies) > 1:
                for comp in companies:
                    counts[comp] += 1
        else:
            if len(companies) > 0:
                for comp in companies:
                    counts[comp] += 1
    return counts

def aggregate_value_sum(df, filter_field=None, filter_value=None, company_field='FirmaAdi', value_field='ihale_dolar'):
    """
    Sum tender values per company.
    
    For each tender row, split the FirmaAdi and divide the tender value equally among the companies.
    For "Açık" usul (filter_field=='IhaleUsulu' and filter_value=="Açık"), count only if more than 1 company is present.
    """
    sums = defaultdict(float)
    if filter_field is not None and filter_value is not None:
        df_filtered = df[df[filter_field] == filter_value]
    else:
        df_filtered = df
    for _, row in df_filtered.iterrows():
        companies = split_companies(row[company_field])
        if len(companies) > 0:
            if filter_field == 'IhaleUsulu' and filter_value == "Açık":
                if len(companies) > 1:
                    share = row[value_field] / len(companies)
                    for comp in companies:
                        sums[comp] += share
            else:
                share = row[value_field] / len(companies)
                for comp in companies:
                    sums[comp] += share
    return sums

def count_and_turnover_for_firm(df, firm):
    cnt = 0
    turnover = 0.0
    for _, row in df.iterrows():
        if firm in split_companies(row['IbbIstirak']):
            cnt += 1
            turnover += row['ihale_dolar']
    return cnt, turnover

def calculate_ratios(df, period_name):
    total_count = len(df)
    total_value = df['ihale_dolar'].sum()
    print("Period:", period_name)
    print("Number of Procurement:", total_count, "Total Turnover:", total_value)
    for m in methods:
        df_m = df[df['IhaleUsulu'] == m]
        count_ratio = len(df_m) / total_count if total_count > 0 else 0
        value_ratio = df_m['ihale_dolar'].sum() / total_value if total_value > 0 else 0
        print("  {}: Turnover = {:.3%}, Turnover Share = {:.3%}".format(m, count_ratio, value_ratio))
    print()


decision_number=3

job_counts_before = aggregate_job_counts(df_before, company_field='FirmaAdi')
avg_tenders = sum(job_counts_before.values()) / len(job_counts_before)
print("Average number of tender: {:.2f}".format(avg_tenders))
firms_min2 = {firm: cnt for firm, cnt in job_counts_before.items() if cnt >= decision_number}
print(f"Number of firms awarded minimum {decision_number} tenders:", len(firms_min2))

non_open_methods = ["Pazarlık", "Belli İstekliler Arasında"]
df_before_nonopen = df_before[df_before['IhaleUsulu'].isin(non_open_methods)]
counts_before_nonopen = aggregate_job_counts(df_before_nonopen, company_field='FirmaAdi')
top10_before = sorted(counts_before_nonopen.items(), key=lambda x: x[1], reverse=True)[:10]
top10_before_set = set([firm for firm, cnt in top10_before])
companies_after_all = get_companies_from_series(df_after['FirmaAdi'])
count_common = len(top10_before_set.intersection(companies_after_all))
print("Number of the firms, which were most frequently awarded non-open tenders during the AKP period, continued receiving contracts under the new CHP administration:", count_common)

all_counts_nonopen = aggregate_job_counts(df_before_nonopen, company_field='FirmaAdi')
all_firms_sorted = sorted(all_counts_nonopen.items(), key=lambda x: x[1], reverse=True)
top_25_count = math.ceil(0.25 * len(all_firms_sorted))
top_25_firms = set([firm for firm, cnt in all_firms_sorted[:top_25_count]])
companies_after_all = get_companies_from_series(df_after['FirmaAdi'])
common_25 = top_25_firms.intersection(companies_after_all)
ratio_25 = len(common_25) / len(top_25_firms) if top_25_firms else 0
print("Number of firms in top %25:", len(top_25_firms))
print("Awarded in CHP period:", len(common_25))
print("Ratio: {:.2%}".format(ratio_25))

firms_nonopen_min2 = {firm for firm, cnt in all_counts_nonopen.items() if cnt >= decision_number}
common_nonopen_min2 = firms_nonopen_min2.intersection(companies_after_all)
ratio_6 = len(common_nonopen_min2) / len(firms_nonopen_min2) if firms_nonopen_min2 else 0
print(f"Number of firm awarded with minimum {decision_number} non-open tender:", len(firms_nonopen_min2))
print("Awarded in CHP period:", len(common_nonopen_min2))
print("Ratio: {:.2%}".format(ratio_6))


all_counts_all = aggregate_job_counts(df_before, company_field='FirmaAdi')
firms_all_min2 = {firm for firm, cnt in all_counts_all.items() if cnt >= decision_number}
common_all_min2 = firms_all_min2.intersection(companies_after_all)
ratio_7 = len(common_all_min2) / len(firms_all_min2) if firms_all_min2 else 0
print(f"Number of firm awarded with minimum {decision_number} tender:", len(firms_all_min2))
print("Awarded in CHP period:", len(common_all_min2))
print("Ratio: {:.2%}".format(ratio_7))

values_before_all = aggregate_value_sum(df_before, company_field='FirmaAdi', value_field='ihale_dolar')
firms_all_min2 = {firm for firm, cnt in all_counts_all.items() if cnt >= decision_number}
total_turnover_before = df_before['ihale_dolar'].sum()
turnover_firms_before = sum([values_before_all.get(firm, 0) for firm in firms_all_min2])
share_turnover_before = turnover_firms_before / total_turnover_before if total_turnover_before > 0 else 0

values_after_all = aggregate_value_sum(df_after, company_field='FirmaAdi', value_field='ihale_dolar')
total_turnover_after = df_after['ihale_dolar'].sum()
turnover_firms_after = sum([values_after_all.get(firm, 0) for firm in firms_all_min2])
share_turnover_after = turnover_firms_after / total_turnover_after if total_turnover_after > 0 else 0

print("Turnover Share in AKP Period: {:.2%}".format(share_turnover_before))
print("Turnover Share in CHP Period: {:.2%}".format(share_turnover_after))


# Table 1

hamidiye = "HAMİDİYE KAYNAK SULARI SAN.TURİZM VE TİC.A.Ş."
ihe = "İSTANBUL HALK EKMEK UN VE UNLU MADDELER GIDA SANAYİ VE TİCARET ANONİM ŞİRKETİ"

total_turnover_before = df_before['ihale_dolar'].sum()
total_turnover_after = df_after['ihale_dolar'].sum()

count_16_before, turnover_16_before = count_and_turnover_for_firm(df_before, hamidiye)
count_16_after, turnover_16_after = count_and_turnover_for_firm(df_after, hamidiye)
print(hamidiye)
print("  AKP Period - Number of Procurement: {}, Turnover: {:.2f}, Turnover Share: {:.2%}".format(
    count_16_before, turnover_16_before, turnover_16_before/total_turnover_before if total_turnover_before>0 else 0))
print("  CHP Period - Number of Procurement: {}, Turnover: {:.2f}, Turnover Share: {:.2%}".format(
    count_16_after, turnover_16_after, turnover_16_after/total_turnover_after if total_turnover_after>0 else 0))

count_17_before, turnover_17_before = count_and_turnover_for_firm(df_before, ihe)
count_17_after, turnover_17_after = count_and_turnover_for_firm(df_after, ihe)
print(ihe)
print("  AKP Period - Number of Procurement: {}, Turnover: {:.2f}, Turnover Share: {:.2%}".format(
    count_17_before, turnover_17_before, turnover_17_before/total_turnover_before if total_turnover_before>0 else 0))
print("  CHP Period - Number of Procurement: {}, Turnover: {:.2f}, Turnover Share: {:.2%}".format(
    count_17_after, turnover_17_after, turnover_17_after/total_turnover_after if total_turnover_after>0 else 0))


# Table 3

methods = ["Açık", "Pazarlık", "Belli İstekliler Arasında"]

calculate_ratios(df_before, "AKP")
calculate_ratios(df_after, "CHP")
